📊 Discretizing Continuous Data for the Dynamic Bayesian Networks¶

Import useful libraries

In [ ]:
import os
import glob
import numpy as np
import pandas as pd

import logging
logging.getLogger().setLevel(logging.CRITICAL)
logging.getLogger("matplotlib").setLevel(logging.WARNING)

import warnings
warnings.filterwarnings('ignore')

from utilities import DataVisualizer, DataDiscretizer, DataProcessor, DataEncoder

Define basic folder paths

In [2]:
# Define folder names
DATA_FOLDER_NAME = r".\data"


ORIGINAL_DATASETS_IMOLA_FOLDER_NAME   = os.path.join(DATA_FOLDER_NAME, "datasets-imola")
DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME   = os.path.join(DATA_FOLDER_NAME, "discretized-datasets-imola")

AGGREGATED_DATASETS_IMOLA_FOLDER_NAME = os.path.join(DATA_FOLDER_NAME, "aggregated-datasets-imola")
DISCRETIZED_AGGREGATED_DATASETS_IMOLA_FOLDER_NAME = os.path.join(DATA_FOLDER_NAME, "discretized-aggregated-datasets-imola")

🔔 Analyzing data distributions¶

In [3]:
# Get all CSV dataset files from the specified folder
datasets_imola = glob.glob(os.path.join(ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, "*.csv"))

print(f"📂 Found {len(datasets_imola)} datasets in '{ORIGINAL_DATASETS_IMOLA_FOLDER_NAME}'")
📂 Found 3 datasets in '.\data\datasets-imola'

🔷 Analyzing 20241128-imola dataset¶

In [4]:
# Select the dataset file to analyze
dataset_path = next((file for file in datasets_imola if '20241128-imola.csv' in file), None)

if dataset_path:
    print(f"📂 Loading dataset: {dataset_path}")

    # Load the dataset into a DataFrame
    imola_20241128_df = pd.read_csv(dataset_path)

    # Display basic dataset information
    print("\n🔍 Dataset Overview:")
    print(imola_20241128_df.info())

    print("\n📊 First 5 Rows:")
    display(imola_20241128_df.head())

    print("\n📈 Summary Statistics:")
    display(imola_20241128_df.describe())

    # Get distribution overview with resulting bins
    bins_result = DataVisualizer.plot_distributions_overview(
        df=imola_20241128_df, 
        columns=imola_20241128_df.columns.difference(['InverterFault']).tolist(), 
        bins='auto', 
        quantiles=[0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]
    )

    # Print bin suggestions in a formatted way
    print("\n📌 Bin Distributions info:")
    for column, stats in bins_result.items():
        print(f"\n🔹 {column}:")
        for key, value in stats.items():
            print(f"   - {key}: {value}")

else:
    print("⚠ Dataset '20241128-imola.csv' not found in the specified folder.")
📂 Loading dataset: .\data\datasets-imola\20241128-imola.csv

🔍 Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 248448 entries, 0 to 248447
Data columns (total 14 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   BatteryVoltage_V             248448 non-null  float64
 1   BatteryCurrent_A             248448 non-null  float64
 2   BatteryPackTemp_C            248448 non-null  float64
 3   InverterFault                248448 non-null  int64  
 4   InverterSpeed_RearLeft_RPM   248448 non-null  float64
 5   Inverter_Iq_Ref_RearLeft_A   248448 non-null  float64
 6   Inverter_Id_Ref_RearLeft_A   248448 non-null  float64
 7   MotorTemp_RearLeft_C         248448 non-null  float64
 8   InverterTemp_RearLeft_C      248448 non-null  float64
 9   InverterSpeed_RearRight_RPM  248448 non-null  float64
 10  Inverter_Iq_Ref_RearRight_A  248448 non-null  float64
 11  Inverter_Id_Ref_RearRight_A  248448 non-null  float64
 12  MotorTemp_RearRight_C        248448 non-null  float64
 13  InverterTemp_RearRight_C     248448 non-null  float64
dtypes: float64(13), int64(1)
memory usage: 26.5 MB
None

📊 First 5 Rows:
BatteryVoltage_V BatteryCurrent_A BatteryPackTemp_C InverterFault InverterSpeed_RearLeft_RPM Inverter_Iq_Ref_RearLeft_A Inverter_Id_Ref_RearLeft_A MotorTemp_RearLeft_C InverterTemp_RearLeft_C InverterSpeed_RearRight_RPM Inverter_Iq_Ref_RearRight_A Inverter_Id_Ref_RearRight_A MotorTemp_RearRight_C InverterTemp_RearRight_C
0 504.26001 0.00 13.9 0 0.0 0.0 0.0 13.0 15.0 0.0 0.0 0.0 14.0 15.0
1 504.26001 0.00 13.9 0 0.0 0.0 0.0 13.0 15.0 0.0 0.0 0.0 14.0 15.0
2 504.26001 0.00 13.9 0 0.0 0.0 0.0 13.0 15.0 0.0 0.0 0.0 14.0 15.0
3 504.26001 -0.03 13.9 0 0.0 0.0 0.0 13.0 15.0 0.0 0.0 0.0 14.0 15.0
4 504.26001 -0.03 13.9 0 0.0 0.0 0.0 13.0 15.0 0.0 0.0 0.0 14.0 15.0
📈 Summary Statistics:
BatteryVoltage_V BatteryCurrent_A BatteryPackTemp_C InverterFault InverterSpeed_RearLeft_RPM Inverter_Iq_Ref_RearLeft_A Inverter_Id_Ref_RearLeft_A MotorTemp_RearLeft_C InverterTemp_RearLeft_C InverterSpeed_RearRight_RPM Inverter_Iq_Ref_RearRight_A Inverter_Id_Ref_RearRight_A MotorTemp_RearRight_C InverterTemp_RearRight_C
count 248448.000000 248448.000000 248448.000000 248448.0 248448.000000 248448.000000 248448.000000 248448.000000 248448.000000 248448.000000 248448.000000 248448.000000 248448.000000 248448.000000
mean 470.132810 9.072812 22.137357 0.0 2691.249139 6.832681 -1.531501 40.275965 29.517537 2656.734955 -6.073359 -1.456921 43.435157 29.595223
std 18.943940 19.741638 5.606731 0.0 3324.347981 12.631832 3.431754 14.733796 5.621444 3253.950091 11.886625 3.229787 16.021885 5.499356
min 413.500000 -36.689999 13.750000 0.0 -76.000000 -26.750000 -49.812500 12.000000 15.000000 -11.000000 -50.500000 -45.375000 13.000000 15.000000
25% 453.799988 0.060000 16.750000 0.0 0.000000 0.000000 -2.062500 32.000000 27.000000 0.000000 -10.812500 -2.000000 34.000000 27.000000
50% 468.540009 0.090000 21.959999 0.0 3.000000 0.000000 0.000000 39.000000 30.000000 0.000000 0.000000 0.000000 42.000000 30.000000
75% 485.420013 10.660000 27.820000 0.0 6012.000000 11.562500 0.000000 49.000000 33.000000 5861.000000 0.000000 0.000000 54.000000 33.000000
max 510.779999 115.239998 29.680000 0.0 14881.000000 52.000000 0.000000 76.000000 45.000000 12655.000000 32.875000 0.000000 80.000000 44.000000
No description has been provided for this image
📌 Bin Distributions info:

🔹 BatteryCurrent_A:
   - num_bins: 6
   - quantile_edges: [-36.68999862670898, -0.0900000035762786, 0.0599999986588954, 0.0900000035762786, 10.65999984741211, 34.4900016784668, 115.23999786376952]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 BatteryPackTemp_C:
   - num_bins: 6
   - quantile_edges: [13.75, 13.899999618530272, 16.75, 21.959999084472656, 27.81999969482422, 29.25, 29.68000030517578]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 BatteryVoltage_V:
   - num_bins: 6
   - quantile_edges: [413.5, 449.260009765625, 453.7999877929688, 468.5400085449219, 485.4200134277344, 495.6400146484375, 510.7799987792969]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterSpeed_RearLeft_RPM:
   - num_bins: 6
   - quantile_edges: [-76.0, -2.0, 0.0, 3.0, 6012.0, 7515.0, 14881.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterSpeed_RearRight_RPM:
   - num_bins: 6
   - quantile_edges: [-11.0, 0.0, 0.0, 0.0, 5861.0, 7448.0, 12655.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterTemp_RearLeft_C:
   - num_bins: 6
   - quantile_edges: [15.0, 24.0, 27.0, 30.0, 33.0, 35.0, 45.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterTemp_RearRight_C:
   - num_bins: 6
   - quantile_edges: [15.0, 24.0, 27.0, 30.0, 33.0, 35.0, 44.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Id_Ref_RearLeft_A:
   - num_bins: 6
   - quantile_edges: [-49.8125, -4.6875, -2.0625, 0.0, 0.0, 0.0, 0.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Id_Ref_RearRight_A:
   - num_bins: 6
   - quantile_edges: [-45.375, -4.3125, -2.0, 0.0, 0.0, 0.0, 0.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Iq_Ref_RearLeft_A:
   - num_bins: 6
   - quantile_edges: [-26.75, 0.0, 0.0, 0.0, 11.5625, 26.75, 52.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Iq_Ref_RearRight_A:
   - num_bins: 6
   - quantile_edges: [-50.5, -24.75, -10.8125, 0.0, 0.0, 0.125, 32.875]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 MotorTemp_RearLeft_C:
   - num_bins: 6
   - quantile_edges: [12.0, 16.0, 32.0, 39.0, 49.0, 61.0, 76.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 MotorTemp_RearRight_C:
   - num_bins: 6
   - quantile_edges: [13.0, 17.0, 34.0, 42.0, 54.0, 65.0, 80.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔷 Analyzing 20250113-imola dataset¶

In [5]:
# Select the dataset file to analyze
dataset_path = next((file for file in datasets_imola if '20250113-imola.csv' in file), None)

if dataset_path:
    print(f"📂 Loading dataset: {dataset_path}")

    # Load the dataset into a DataFrame
    imola_20250113_df = pd.read_csv(dataset_path)

    # Display basic dataset information
    print("\n🔍 Dataset Overview:")
    print(imola_20250113_df.info())

    print("\n📊 First 5 Rows:")
    display(imola_20250113_df.head())

    print("\n📈 Summary Statistics:")
    display(imola_20250113_df.describe())

    # Get distribution overview with resulting bins
    bins_result = DataVisualizer.plot_distributions_overview(
        df=imola_20250113_df, 
        columns=imola_20250113_df.columns.difference(['InverterFault']).tolist(), 
        bins='auto', 
        quantiles=[0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]
    )

    # Print bin suggestions in a formatted way
    print("\n📌 Bin Distributions info:")
    for column, stats in bins_result.items():
        print(f"\n🔹 {column}:")
        for key, value in stats.items():
            print(f"   - {key}: {value}")

else:
    print("⚠ Dataset '20250113-imola.csv' not found in the specified folder.")
📂 Loading dataset: .\data\datasets-imola\20250113-imola.csv

🔍 Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 497553 entries, 0 to 497552
Data columns (total 14 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   BatteryVoltage_V             497553 non-null  float64
 1   BatteryCurrent_A             497553 non-null  float64
 2   BatteryPackTemp_C            497553 non-null  float64
 3   InverterFault                497553 non-null  int64  
 4   InverterSpeed_RearLeft_RPM   497553 non-null  float64
 5   Inverter_Iq_Ref_RearLeft_A   497553 non-null  float64
 6   Inverter_Id_Ref_RearLeft_A   497553 non-null  float64
 7   MotorTemp_RearLeft_C         497553 non-null  float64
 8   InverterTemp_RearLeft_C      497553 non-null  float64
 9   InverterSpeed_RearRight_RPM  497553 non-null  float64
 10  Inverter_Iq_Ref_RearRight_A  497553 non-null  float64
 11  Inverter_Id_Ref_RearRight_A  497553 non-null  float64
 12  MotorTemp_RearRight_C        497553 non-null  float64
 13  InverterTemp_RearRight_C     497553 non-null  float64
dtypes: float64(13), int64(1)
memory usage: 53.1 MB
None

📊 First 5 Rows:
BatteryVoltage_V BatteryCurrent_A BatteryPackTemp_C InverterFault InverterSpeed_RearLeft_RPM Inverter_Iq_Ref_RearLeft_A Inverter_Id_Ref_RearLeft_A MotorTemp_RearLeft_C InverterTemp_RearLeft_C InverterSpeed_RearRight_RPM Inverter_Iq_Ref_RearRight_A Inverter_Id_Ref_RearRight_A MotorTemp_RearRight_C InverterTemp_RearRight_C
0 502.880005 0.03 5.19 0 0.0 0.0 0.0 8.0 11.0 31.0 0.0 0.0 10.0 13.0
1 502.899994 0.06 5.19 0 0.0 0.0 0.0 8.0 11.0 31.0 0.0 0.0 10.0 13.0
2 502.899994 0.06 5.19 0 3.0 0.0 0.0 8.0 11.0 29.0 0.0 0.0 10.0 13.0
3 502.899994 0.06 5.19 0 3.0 0.0 0.0 8.0 11.0 29.0 0.0 0.0 10.0 13.0
4 502.899994 0.06 5.19 0 3.0 0.0 0.0 8.0 11.0 29.0 0.0 0.0 10.0 13.0
📈 Summary Statistics:
BatteryVoltage_V BatteryCurrent_A BatteryPackTemp_C InverterFault InverterSpeed_RearLeft_RPM Inverter_Iq_Ref_RearLeft_A Inverter_Id_Ref_RearLeft_A MotorTemp_RearLeft_C InverterTemp_RearLeft_C InverterSpeed_RearRight_RPM Inverter_Iq_Ref_RearRight_A Inverter_Id_Ref_RearRight_A MotorTemp_RearRight_C InverterTemp_RearRight_C
count 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000 497553.000000
mean 463.542110 6.043057 15.061768 0.116209 2167.661799 6.023563 -1.143004 33.947387 25.935744 2133.151913 -5.894690 -1.118844 37.482093 26.616244
std 20.394413 14.703243 4.844621 0.320475 2369.976221 11.305793 2.497655 9.819891 5.290186 2304.850672 11.040044 2.452635 11.906781 5.114772
min 385.820007 -13.500000 5.190000 0.000000 -1439.000000 -7.062500 -52.937500 7.000000 0.000000 -573.000000 -50.500000 -45.937500 9.000000 0.000000
25% 448.380005 0.000000 11.490000 0.000000 0.000000 0.000000 -1.312500 29.000000 24.000000 0.000000 -7.562500 -1.312500 30.000000 24.000000
50% 463.579987 0.090000 15.730000 0.000000 1748.000000 0.000000 0.000000 33.000000 25.000000 1740.000000 0.000000 0.000000 36.000000 26.000000
75% 478.880005 4.360000 19.270000 0.000000 4075.000000 7.750000 0.000000 42.000000 28.000000 4023.000000 0.000000 0.000000 47.000000 29.000000
max 503.100006 115.790001 23.639999 1.000000 14967.000000 52.000000 0.000000 57.000000 45.000000 10113.000000 6.875000 0.000000 67.000000 46.000000
No description has been provided for this image
📌 Bin Distributions info:

🔹 BatteryCurrent_A:
   - num_bins: 6
   - quantile_edges: [-13.5, -0.3899999856948852, 0.0, 0.0900000035762786, 4.360000133514404, 20.559999465942383, 115.79000091552734]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 BatteryPackTemp_C:
   - num_bins: 6
   - quantile_edges: [5.190000057220459, 7.090000152587891, 11.489999771118164, 15.729999542236328, 19.270000457763672, 20.68000030517578, 23.63999938964844]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 BatteryVoltage_V:
   - num_bins: 6
   - quantile_edges: [385.8200073242188, 435.2200012207031, 448.3800048828125, 463.5799865722656, 478.8800048828125, 488.8599853515625, 503.1000061035156]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterSpeed_RearLeft_RPM:
   - num_bins: 6
   - quantile_edges: [-1439.0, -2.0, 0.0, 1748.0, 4075.0, 5399.0, 14967.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterSpeed_RearRight_RPM:
   - num_bins: 6
   - quantile_edges: [-573.0, 0.0, 0.0, 1740.0, 4023.0, 5301.0, 10113.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterTemp_RearLeft_C:
   - num_bins: 6
   - quantile_edges: [0.0, 21.0, 24.0, 25.0, 28.0, 33.0, 45.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterTemp_RearRight_C:
   - num_bins: 6
   - quantile_edges: [0.0, 22.0, 24.0, 26.0, 29.0, 34.0, 46.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Id_Ref_RearLeft_A:
   - num_bins: 6
   - quantile_edges: [-52.9375, -4.0, -1.3125, 0.0, 0.0, 0.0, 0.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Id_Ref_RearRight_A:
   - num_bins: 6
   - quantile_edges: [-45.9375, -3.9375, -1.3125, 0.0, 0.0, 0.0, 0.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Iq_Ref_RearLeft_A:
   - num_bins: 6
   - quantile_edges: [-7.0625, -0.125, 0.0, 0.0, 7.75, 22.8125, 52.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Iq_Ref_RearRight_A:
   - num_bins: 6
   - quantile_edges: [-50.5, -22.25, -7.5625, 0.0, 0.0, 0.125, 6.875]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 MotorTemp_RearLeft_C:
   - num_bins: 6
   - quantile_edges: [7.0, 25.0, 29.0, 33.0, 42.0, 46.0, 57.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 MotorTemp_RearRight_C:
   - num_bins: 6
   - quantile_edges: [9.0, 25.0, 30.0, 36.0, 47.0, 53.0, 67.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔷 Analyzing 20250114-imola dataset¶

In [6]:
# Select the dataset file to analyze
dataset_path = next((file for file in datasets_imola if '20250114-imola.csv' in file), None)

if dataset_path:
    print(f"📂 Loading dataset: {dataset_path}")

    # Load the dataset into a DataFrame
    imola_20250114_df = pd.read_csv(dataset_path)

    # Display basic dataset information
    print("\n🔍 Dataset Overview:")
    print(imola_20250114_df.info())

    print("\n📊 First 5 Rows:")
    display(imola_20250114_df.head())

    print("\n📈 Summary Statistics:")
    display(imola_20250114_df.describe())

    # Get distribution overview with resulting bins
    bins_result = DataVisualizer.plot_distributions_overview(
        df=imola_20250114_df, 
        columns=imola_20250114_df.columns.difference(['InverterFault']).tolist(), 
        bins='auto', 
        quantiles=[0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]
    )

    # Print bin suggestions in a formatted way
    print("\n📌 Bin Distributions info:")
    for column, stats in bins_result.items():
        print(f"\n🔹 {column}:")
        for key, value in stats.items():
            print(f"   - {key}: {value}")

else:
    print("⚠ Dataset '20250114-imola.csv' not found in the specified folder.")
📂 Loading dataset: .\data\datasets-imola\20250114-imola.csv

🔍 Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 603724 entries, 0 to 603723
Data columns (total 14 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   BatteryVoltage_V             603724 non-null  float64
 1   BatteryCurrent_A             603724 non-null  float64
 2   BatteryPackTemp_C            603724 non-null  float64
 3   InverterFault                603724 non-null  int64  
 4   InverterSpeed_RearLeft_RPM   603724 non-null  float64
 5   Inverter_Iq_Ref_RearLeft_A   603724 non-null  float64
 6   Inverter_Id_Ref_RearLeft_A   603724 non-null  float64
 7   MotorTemp_RearLeft_C         603724 non-null  float64
 8   InverterTemp_RearLeft_C      603724 non-null  float64
 9   InverterSpeed_RearRight_RPM  603724 non-null  float64
 10  Inverter_Iq_Ref_RearRight_A  603724 non-null  float64
 11  Inverter_Id_Ref_RearRight_A  603724 non-null  float64
 12  MotorTemp_RearRight_C        603724 non-null  float64
 13  InverterTemp_RearRight_C     603724 non-null  float64
dtypes: float64(13), int64(1)
memory usage: 64.5 MB
None

📊 First 5 Rows:
BatteryVoltage_V BatteryCurrent_A BatteryPackTemp_C InverterFault InverterSpeed_RearLeft_RPM Inverter_Iq_Ref_RearLeft_A Inverter_Id_Ref_RearLeft_A MotorTemp_RearLeft_C InverterTemp_RearLeft_C InverterSpeed_RearRight_RPM Inverter_Iq_Ref_RearRight_A Inverter_Id_Ref_RearRight_A MotorTemp_RearRight_C InverterTemp_RearRight_C
0 500.220001 0.03 17.120001 0 0.0 0.0 0.0 9.0 10.0 0.0 0.0 0.0 10.0 10.0
1 500.220001 0.03 17.120001 0 0.0 0.0 0.0 9.0 10.0 0.0 0.0 0.0 10.0 10.0
2 500.220001 0.03 17.120001 0 -2.0 0.0 0.0 9.0 10.0 0.0 0.0 0.0 10.0 10.0
3 500.220001 0.03 17.120001 0 -2.0 0.0 0.0 9.0 10.0 0.0 0.0 0.0 10.0 10.0
4 500.220001 0.03 17.120001 0 -2.0 0.0 0.0 9.0 10.0 0.0 0.0 0.0 10.0 10.0
📈 Summary Statistics:
BatteryVoltage_V BatteryCurrent_A BatteryPackTemp_C InverterFault InverterSpeed_RearLeft_RPM Inverter_Iq_Ref_RearLeft_A Inverter_Id_Ref_RearLeft_A MotorTemp_RearLeft_C InverterTemp_RearLeft_C InverterSpeed_RearRight_RPM Inverter_Iq_Ref_RearRight_A Inverter_Id_Ref_RearRight_A MotorTemp_RearRight_C InverterTemp_RearRight_C
count 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000 603724.000000
mean 475.379427 5.326524 26.169941 0.125693 1833.484712 5.054590 -0.956816 31.860244 26.206788 1802.161027 -4.978979 -0.954104 35.229350 26.970097
std 16.640589 14.433446 5.712431 0.331504 2463.637554 11.118464 2.350098 11.030861 5.909046 2405.234361 10.923923 2.402517 12.961616 6.125237
min 405.399994 -14.930000 15.100000 0.000000 -326.000000 -7.625000 -52.937500 4.000000 0.000000 -239.000000 -50.375000 -46.125000 4.000000 0.000000
25% 460.880005 0.030000 18.950001 0.000000 0.000000 0.000000 -0.437500 27.000000 23.000000 0.000000 -2.062500 -0.437500 29.000000 24.000000
50% 478.940002 0.060000 28.540001 0.000000 3.000000 0.000000 0.000000 31.000000 26.000000 0.000000 0.000000 0.000000 34.000000 27.000000
75% 488.459991 0.700000 30.730000 0.000000 3873.000000 1.875000 0.000000 38.000000 30.000000 3841.000000 0.000000 0.000000 42.000000 31.000000
max 503.260010 116.339996 33.150002 1.000000 15312.000000 51.875000 0.000000 66.000000 45.000000 11698.000000 7.375000 0.000000 77.000000 46.000000
No description has been provided for this image
📌 Bin Distributions info:

🔹 BatteryCurrent_A:
   - num_bins: 6
   - quantile_edges: [-14.93000030517578, -0.3300000131130218, 0.0299999993294477, 0.0599999986588954, 0.699999988079071, 20.040000915527344, 116.33999633789062]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 BatteryPackTemp_C:
   - num_bins: 6
   - quantile_edges: [15.100000381469728, 16.969999313354492, 18.950000762939453, 28.540000915527344, 30.729999542236328, 31.200000762939453, 33.150001525878906]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 BatteryVoltage_V:
   - num_bins: 6
   - quantile_edges: [405.3999938964844, 454.8999938964844, 460.8800048828125, 478.9400024414063, 488.4599914550781, 494.3599853515625, 503.260009765625]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterSpeed_RearLeft_RPM:
   - num_bins: 6
   - quantile_edges: [-326.0, -2.0, 0.0, 3.0, 3873.0, 5571.0, 15312.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterSpeed_RearRight_RPM:
   - num_bins: 6
   - quantile_edges: [-239.0, 0.0, 0.0, 0.0, 3841.0, 5446.0, 11698.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterTemp_RearLeft_C:
   - num_bins: 6
   - quantile_edges: [0.0, 20.0, 23.0, 26.0, 30.0, 34.0, 45.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 InverterTemp_RearRight_C:
   - num_bins: 6
   - quantile_edges: [0.0, 21.0, 24.0, 27.0, 31.0, 35.0, 46.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Id_Ref_RearLeft_A:
   - num_bins: 6
   - quantile_edges: [-52.9375, -3.875, -0.4375, 0.0, 0.0, 0.0, 0.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Id_Ref_RearRight_A:
   - num_bins: 6
   - quantile_edges: [-46.125, -3.8125, -0.4375, 0.0, 0.0, 0.0, 0.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Iq_Ref_RearLeft_A:
   - num_bins: 6
   - quantile_edges: [-7.625, 0.0, 0.0, 0.0, 1.875, 22.25, 51.875]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 Inverter_Iq_Ref_RearRight_A:
   - num_bins: 6
   - quantile_edges: [-50.375, -21.875, -2.0625, 0.0, 0.0, 0.0, 7.375]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 MotorTemp_RearLeft_C:
   - num_bins: 6
   - quantile_edges: [4.0, 19.0, 27.0, 31.0, 38.0, 45.0, 66.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🔹 MotorTemp_RearRight_C:
   - num_bins: 6
   - quantile_edges: [4.0, 19.0, 29.0, 34.0, 42.0, 51.0, 77.0]
   - quantiles_used: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0]

🪓 Manual discretization with cut method¶

Define the bins intervals and labels for each feature

In [7]:
# Configure different discretization for each column
discretizion_columns_configs = {
    'BatteryCurrent_A': {
        'bins': [-np.inf, 0.09, 20., np.inf],
        'labels': ['0_Idle', '1_Normal', '2_High']
    },
    'BatteryPackTemp_C': {
        'bins': [-np.inf, 16., 28., np.inf],
        'labels': ['0_Low', '1_Normal', '2_High']
    },
    'BatteryVoltage_V': {
        'bins': [-np.inf, 450., 480., np.inf],
        'labels': ['0_Low', '1_Medium', '2_High']
    },
    'InverterSpeed_RearLeft_RPM': {
        'bins': [-np.inf, 0., 4000., np.inf],
        'labels': ['0_Idle', '1_Normal_Pace', '2_High_Pace']
    },
    'InverterSpeed_RearRight_RPM': {
        'bins': [-np.inf, 0., 4000., np.inf],
        'labels': ['0_Idle', '1_Normal_Pace', '2_High_Pace']
    },
    'InverterTemp_RearLeft_C': {
        'bins': [-np.inf, 20., 30., np.inf],
        'labels': ['0_Low', '1_Normal', '2_High']
    },
    'InverterTemp_RearRight_C': {
        'bins': [-np.inf, 20., 30., np.inf],
        'labels': ['0_Low', '1_Normal', '2_High']
    },
    'Inverter_Id_Ref_RearLeft_A': {
        'bins': [-np.inf, -4., -1., np.inf],
        'labels': ['0_Very_Negative', '1_Negative', '2_Close_Zero']
    },
    'Inverter_Id_Ref_RearRight_A': {
        'bins': [-np.inf, -4., -1., np.inf],
        'labels': ['0_Very_Negative', '1_Negative', '2_Close_Zero']
    },
    'Inverter_Iq_Ref_RearRight_A': {
        'bins': [-np.inf, -4, 4, np.inf],
        'labels': ['0_Negative', '1_Close_Zero', '2_Positive']
    },
    'Inverter_Iq_Ref_RearLeft_A': {
        'bins': [-np.inf, -4, 4, np.inf],
        'labels': ['0_Negative', '1_Close_Zero', '2_Positive']
    },
    'MotorTemp_RearLeft_C': {
        'bins': [-np.inf, 25., 55., np.inf],
        'labels': ['0_Low', '1_Normal', '2_High']
    },
    'MotorTemp_RearRight_C': {
        'bins': [-np.inf, 25., 55., np.inf],
        'labels': ['0_Low', '1_Normal', '2_High']
    },
}

🔷 Discretizing 20241128-imola dataset with cut¶

In [8]:
# Perform discretization on numeric columns using predefined bin configurations
discretization_info, discretized_imola_20241128_df = DataDiscretizer.discretize_columns(
    df=imola_20241128_df, 
    columns_config=discretizion_columns_configs,
    method='cut'
)

# Display summary of discretization results
print("\n📊 Discretization Overview:")
for column, info in discretization_info.items():
    print(f"\n🔹 Column: {column}")
    print(f"   - Method: {info['method']}")
    print(f"   - Bin Edges: {info['bin_edges']}")
    print(f"   - Number of Bins: {info['bin_count']}")
    print("   - Value Counts:")
    print(info['value_counts'].to_string())
📊 Discretization Overview:

🔹 Column: BatteryCurrent_A
   - Method: Threshold-based
   - Bin Edges: [ -inf  0.09 20.     inf]
   - Number of Bins: 3
   - Value Counts:
BatteryCurrent_A
0_Idle      103222
1_Normal     99478
2_High       45748

🔹 Column: BatteryPackTemp_C
   - Method: Threshold-based
   - Bin Edges: [-inf  16.  28.  inf]
   - Number of Bins: 3
   - Value Counts:
BatteryPackTemp_C
0_Low        44102
1_Normal    143430
2_High       60916

🔹 Column: BatteryVoltage_V
   - Method: Threshold-based
   - Bin Edges: [-inf 450. 480.  inf]
   - Number of Bins: 3
   - Value Counts:
BatteryVoltage_V
0_Low        27047
1_Medium    139398
2_High       82003

🔹 Column: InverterSpeed_RearLeft_RPM
   - Method: Threshold-based
   - Bin Edges: [ -inf    0. 4000.   inf]
   - Number of Bins: 3
   - Value Counts:
InverterSpeed_RearLeft_RPM
0_Idle           89932
1_Normal_Pace    65861
2_High_Pace      92655

🔹 Column: InverterSpeed_RearRight_RPM
   - Method: Threshold-based
   - Bin Edges: [ -inf    0. 4000.   inf]
   - Number of Bins: 3
   - Value Counts:
InverterSpeed_RearRight_RPM
0_Idle           130287
1_Normal_Pace     24379
2_High_Pace       93782

🔹 Column: InverterTemp_RearLeft_C
   - Method: Threshold-based
   - Bin Edges: [-inf  20.  30.  inf]
   - Number of Bins: 3
   - Value Counts:
InverterTemp_RearLeft_C
0_Low        21543
1_Normal    108051
2_High      118854

🔹 Column: InverterTemp_RearRight_C
   - Method: Threshold-based
   - Bin Edges: [-inf  20.  30.  inf]
   - Number of Bins: 3
   - Value Counts:
InverterTemp_RearRight_C
0_Low        21442
1_Normal    104747
2_High      122259

🔹 Column: Inverter_Id_Ref_RearLeft_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.  -1.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Id_Ref_RearLeft_A
0_Very_Negative     33794
1_Negative          48612
2_Close_Zero       166042

🔹 Column: Inverter_Id_Ref_RearRight_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.  -1.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Id_Ref_RearRight_A
0_Very_Negative     29745
1_Negative          55072
2_Close_Zero       163631

🔹 Column: Inverter_Iq_Ref_RearRight_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.   4.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Iq_Ref_RearRight_A
0_Negative       78422
1_Close_Zero    157497
2_Positive       12529

🔹 Column: Inverter_Iq_Ref_RearLeft_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.   4.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Iq_Ref_RearLeft_A
0_Negative        8578
1_Close_Zero    159569
2_Positive       80301

🔹 Column: MotorTemp_RearLeft_C
   - Method: Threshold-based
   - Bin Edges: [-inf  25.  55.  inf]
   - Number of Bins: 3
   - Value Counts:
MotorTemp_RearLeft_C
0_Low        31242
1_Normal    178111
2_High       39095

🔹 Column: MotorTemp_RearRight_C
   - Method: Threshold-based
   - Bin Edges: [-inf  25.  55.  inf]
   - Number of Bins: 3
   - Value Counts:
MotorTemp_RearRight_C
0_Low        29742
1_Normal    162912
2_High       55794
In [9]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, discretized_imola_20241128_df = DataDiscretizer.binarize_columns(
    df=discretized_imola_20241128_df, 
    columns=['InverterFault'],
    thresholds={'InverterFault': 0.5},
)

# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
    print(f"\n🔹 Column: {column}")
    print(f"   - Threshold: {info['threshold']}")
    print(f"   - True (%): {info['percentage_true']}")
    print("   - Value Counts:")
    print(info['value_counts'].to_string())
📊 Binarization Overview:

🔹 Column: InverterFault
   - Threshold: 0.5
   - True (%): 0.0
   - Value Counts:
InverterFault
False    248448
In [10]:
# Display first few rows of discretized data
print("\n📋 Sample of Discretized Data:")
display(discretized_imola_20241128_df.head())
📋 Sample of Discretized Data:
BatteryVoltage_V BatteryCurrent_A BatteryPackTemp_C InverterFault InverterSpeed_RearLeft_RPM Inverter_Iq_Ref_RearLeft_A Inverter_Id_Ref_RearLeft_A MotorTemp_RearLeft_C InverterTemp_RearLeft_C InverterSpeed_RearRight_RPM Inverter_Iq_Ref_RearRight_A Inverter_Id_Ref_RearRight_A MotorTemp_RearRight_C InverterTemp_RearRight_C
0 2_High 0_Idle 0_Low False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
1 2_High 0_Idle 0_Low False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
2 2_High 0_Idle 0_Low False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
3 2_High 0_Idle 0_Low False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
4 2_High 0_Idle 0_Low False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
In [11]:
encoded_imola20241128_df, encoding_info = DataEncoder.encode_categorical_columns(
    df=discretized_imola_20241128_df, 
    encoding_strategy='ordinal'
)

DataVisualizer.plot_discrete_distributions(
    df=encoded_imola20241128_df,
    fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
No description has been provided for this image
In [12]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'discr-20241128-imola.csv')

# Save the discretized dataset
DataProcessor.save_dataset(discretized_imola_20241128_df, output_file, file_format="csv")
print(f"✅ Saved discretized data to: {output_file}")
Dataset with shape (248448, 14), saved successfully at .\data\discretized-datasets-imola\discr-20241128-imola.csv (csv).
✅ Saved discretized data to: .\data\discretized-datasets-imola\discr-20241128-imola.csv

🔷 Discretizing 20250113-imola dataset with cut¶

In [13]:
# Perform discretization on numeric columns using predefined bin configurations
discretization_info, discretized_imola_20250113_df = DataDiscretizer.discretize_columns(
    df=imola_20250113_df, 
    columns_config=discretizion_columns_configs,
    method='cut'
)

# Display summary of discretization results
print("\n📊 Discretization Overview:")
for column, info in discretization_info.items():
    print(f"\n🔹 Column: {column}")
    print(f"   - Method: {info['method']}")
    print(f"   - Bin Edges: {info['bin_edges']}")
    print(f"   - Number of Bins: {info['bin_count']}")
    print("   - Value Counts:")
    print(info['value_counts'].to_string())
📊 Discretization Overview:

🔹 Column: BatteryCurrent_A
   - Method: Threshold-based
   - Bin Edges: [ -inf  0.09 20.     inf]
   - Number of Bins: 3
   - Value Counts:
BatteryCurrent_A
0_Idle      227235
1_Normal    219245
2_High       51073

🔹 Column: BatteryPackTemp_C
   - Method: Threshold-based
   - Bin Edges: [-inf  16.  28.  inf]
   - Number of Bins: 3
   - Value Counts:
BatteryPackTemp_C
0_Low       268152
1_Normal    229401
2_High           0

🔹 Column: BatteryVoltage_V
   - Method: Threshold-based
   - Bin Edges: [-inf 450. 480.  inf]
   - Number of Bins: 3
   - Value Counts:
BatteryVoltage_V
0_Low       141034
1_Medium    244765
2_High      111754

🔹 Column: InverterSpeed_RearLeft_RPM
   - Method: Threshold-based
   - Bin Edges: [ -inf    0. 4000.   inf]
   - Number of Bins: 3
   - Value Counts:
InverterSpeed_RearLeft_RPM
0_Idle           147953
1_Normal_Pace    220363
2_High_Pace      129237

🔹 Column: InverterSpeed_RearRight_RPM
   - Method: Threshold-based
   - Bin Edges: [ -inf    0. 4000.   inf]
   - Number of Bins: 3
   - Value Counts:
InverterSpeed_RearRight_RPM
0_Idle           213562
1_Normal_Pace    158162
2_High_Pace      125829

🔹 Column: InverterTemp_RearLeft_C
   - Method: Threshold-based
   - Bin Edges: [-inf  20.  30.  inf]
   - Number of Bins: 3
   - Value Counts:
InverterTemp_RearLeft_C
0_Low        41664
1_Normal    369706
2_High       86183

🔹 Column: InverterTemp_RearRight_C
   - Method: Threshold-based
   - Bin Edges: [-inf  20.  30.  inf]
   - Number of Bins: 3
   - Value Counts:
InverterTemp_RearRight_C
0_Low        32166
1_Normal    369644
2_High       95743

🔹 Column: Inverter_Id_Ref_RearLeft_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.  -1.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Id_Ref_RearLeft_A
0_Very_Negative     50226
1_Negative          90760
2_Close_Zero       356567

🔹 Column: Inverter_Id_Ref_RearRight_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.  -1.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Id_Ref_RearRight_A
0_Very_Negative     48531
1_Negative          92083
2_Close_Zero       356939

🔹 Column: Inverter_Iq_Ref_RearRight_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.   4.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Iq_Ref_RearRight_A
0_Negative      155024
1_Close_Zero    341904
2_Positive         625

🔹 Column: Inverter_Iq_Ref_RearLeft_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.   4.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Iq_Ref_RearLeft_A
0_Negative         751
1_Close_Zero    341385
2_Positive      155417

🔹 Column: MotorTemp_RearLeft_C
   - Method: Threshold-based
   - Bin Edges: [-inf  25.  55.  inf]
   - Number of Bins: 3
   - Value Counts:
MotorTemp_RearLeft_C
0_Low        55209
1_Normal    440648
2_High        1696

🔹 Column: MotorTemp_RearRight_C
   - Method: Threshold-based
   - Bin Edges: [-inf  25.  55.  inf]
   - Number of Bins: 3
   - Value Counts:
MotorTemp_RearRight_C
0_Low        50555
1_Normal    412212
2_High       34786
In [14]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, discretized_imola_20250113_df = DataDiscretizer.binarize_columns(
    df=discretized_imola_20250113_df, 
    columns=['InverterFault'],
    thresholds={'InverterFault': 0.5},
)

# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
    print(f"\n🔹 Column: {column}")
    print(f"   - Threshold: {info['threshold']}")
    print(f"   - True (%): {info['percentage_true']}")
    print("   - Value Counts:")
    print(info['value_counts'].to_string())
📊 Binarization Overview:

🔹 Column: InverterFault
   - Threshold: 0.5
   - True (%): 11.620872550260977
   - Value Counts:
InverterFault
False    439733
True      57820
In [15]:
# Display first few rows of discretized data
print("\n📋 Sample of Discretized Data:")
display(discretized_imola_20250113_df.head())
📋 Sample of Discretized Data:
BatteryVoltage_V BatteryCurrent_A BatteryPackTemp_C InverterFault InverterSpeed_RearLeft_RPM Inverter_Iq_Ref_RearLeft_A Inverter_Id_Ref_RearLeft_A MotorTemp_RearLeft_C InverterTemp_RearLeft_C InverterSpeed_RearRight_RPM Inverter_Iq_Ref_RearRight_A Inverter_Id_Ref_RearRight_A MotorTemp_RearRight_C InverterTemp_RearRight_C
0 2_High 0_Idle 0_Low False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 1_Normal_Pace 1_Close_Zero 2_Close_Zero 0_Low 0_Low
1 2_High 0_Idle 0_Low False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 1_Normal_Pace 1_Close_Zero 2_Close_Zero 0_Low 0_Low
2 2_High 0_Idle 0_Low False 1_Normal_Pace 1_Close_Zero 2_Close_Zero 0_Low 0_Low 1_Normal_Pace 1_Close_Zero 2_Close_Zero 0_Low 0_Low
3 2_High 0_Idle 0_Low False 1_Normal_Pace 1_Close_Zero 2_Close_Zero 0_Low 0_Low 1_Normal_Pace 1_Close_Zero 2_Close_Zero 0_Low 0_Low
4 2_High 0_Idle 0_Low False 1_Normal_Pace 1_Close_Zero 2_Close_Zero 0_Low 0_Low 1_Normal_Pace 1_Close_Zero 2_Close_Zero 0_Low 0_Low
In [16]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20250113_df, _ = DataEncoder.encode_categorical_columns(
    df=discretized_imola_20250113_df, 
    encoding_strategy='ordinal'
)

# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
    df=encoded_imola_20250113_df,
    fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
No description has been provided for this image
In [17]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'discr-20250113-imola.csv')

# Save the discretized dataset
DataProcessor.save_dataset(discretized_imola_20250113_df, output_file, file_format="csv")
print(f"✅ Saved discretized data to: {output_file}")
Dataset with shape (497553, 14), saved successfully at .\data\discretized-datasets-imola\discr-20250113-imola.csv (csv).
✅ Saved discretized data to: .\data\discretized-datasets-imola\discr-20250113-imola.csv

🔷 Discretizing 20250114-imola dataset with cut¶

In [18]:
# Perform discretization on numeric columns using predefined bin configurations
discretization_info, discretized_imola_20250114_df = DataDiscretizer.discretize_columns(
    df=imola_20250114_df, 
    columns_config=discretizion_columns_configs,
    method='cut'
)

# Display summary of discretization results
print("\n📊 Discretization Overview:")
for column, info in discretization_info.items():
    print(f"\n🔹 Column: {column}")
    print(f"   - Method: {info['method']}")
    print(f"   - Bin Edges: {info['bin_edges']}")
    print(f"   - Number of Bins: {info['bin_count']}")
    print("   - Value Counts:")
    print(info['value_counts'].to_string())
📊 Discretization Overview:

🔹 Column: BatteryCurrent_A
   - Method: Threshold-based
   - Bin Edges: [ -inf  0.09 20.     inf]
   - Number of Bins: 3
   - Value Counts:
BatteryCurrent_A
0_Idle      365956
1_Normal    177326
2_High       60442

🔹 Column: BatteryPackTemp_C
   - Method: Threshold-based
   - Bin Edges: [-inf  16.  28.  inf]
   - Number of Bins: 3
   - Value Counts:
BatteryPackTemp_C
0_Low        21071
1_Normal    240217
2_High      342436

🔹 Column: BatteryVoltage_V
   - Method: Threshold-based
   - Bin Edges: [-inf 450. 480.  inf]
   - Number of Bins: 3
   - Value Counts:
BatteryVoltage_V
0_Low        48085
1_Medium    266979
2_High      288660

🔹 Column: InverterSpeed_RearLeft_RPM
   - Method: Threshold-based
   - Bin Edges: [ -inf    0. 4000.   inf]
   - Number of Bins: 3
   - Value Counts:
InverterSpeed_RearLeft_RPM
0_Idle           220654
1_Normal_Pace    239224
2_High_Pace      143846

🔹 Column: InverterSpeed_RearRight_RPM
   - Method: Threshold-based
   - Bin Edges: [ -inf    0. 4000.   inf]
   - Number of Bins: 3
   - Value Counts:
InverterSpeed_RearRight_RPM
0_Idle           322972
1_Normal_Pace    139467
2_High_Pace      141285

🔹 Column: InverterTemp_RearLeft_C
   - Method: Threshold-based
   - Bin Edges: [-inf  20.  30.  inf]
   - Number of Bins: 3
   - Value Counts:
InverterTemp_RearLeft_C
0_Low        67147
1_Normal    402530
2_High      134047

🔹 Column: InverterTemp_RearRight_C
   - Method: Threshold-based
   - Bin Edges: [-inf  20.  30.  inf]
   - Number of Bins: 3
   - Value Counts:
InverterTemp_RearRight_C
0_Low        56755
1_Normal    391176
2_High      155793

🔹 Column: Inverter_Id_Ref_RearLeft_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.  -1.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Id_Ref_RearLeft_A
0_Very_Negative     58818
1_Negative          72005
2_Close_Zero       472901

🔹 Column: Inverter_Id_Ref_RearRight_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.  -1.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Id_Ref_RearRight_A
0_Very_Negative     57328
1_Negative          73730
2_Close_Zero       472666

🔹 Column: Inverter_Iq_Ref_RearRight_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.   4.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Iq_Ref_RearRight_A
0_Negative      140283
1_Close_Zero    462711
2_Positive         730

🔹 Column: Inverter_Iq_Ref_RearLeft_A
   - Method: Threshold-based
   - Bin Edges: [-inf  -4.   4.  inf]
   - Number of Bins: 3
   - Value Counts:
Inverter_Iq_Ref_RearLeft_A
0_Negative         823
1_Close_Zero    462841
2_Positive      140060

🔹 Column: MotorTemp_RearLeft_C
   - Method: Threshold-based
   - Bin Edges: [-inf  25.  55.  inf]
   - Number of Bins: 3
   - Value Counts:
MotorTemp_RearLeft_C
0_Low       112472
1_Normal    469609
2_High       21643

🔹 Column: MotorTemp_RearRight_C
   - Method: Threshold-based
   - Bin Edges: [-inf  25.  55.  inf]
   - Number of Bins: 3
   - Value Counts:
MotorTemp_RearRight_C
0_Low        84316
1_Normal    483568
2_High       35840
In [19]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, discretized_imola_20250114_df = DataDiscretizer.binarize_columns(
    df=discretized_imola_20250114_df, 
    columns=['InverterFault'],
    thresholds={'InverterFault': 0.5},
)

# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
    print(f"\n🔹 Column: {column}")
    print(f"   - Threshold: {info['threshold']}")
    print(f"   - True (%): {info['percentage_true']}")
    print("   - Value Counts:")
    print(info['value_counts'].to_string())
📊 Binarization Overview:

🔹 Column: InverterFault
   - Threshold: 0.5
   - True (%): 12.56931975538491
   - Value Counts:
InverterFault
False    527840
True      75884
In [20]:
# Display first few rows of discretized data
print("\n📋 Sample of Discretized Data:")
display(discretized_imola_20250114_df.head())
📋 Sample of Discretized Data:
BatteryVoltage_V BatteryCurrent_A BatteryPackTemp_C InverterFault InverterSpeed_RearLeft_RPM Inverter_Iq_Ref_RearLeft_A Inverter_Id_Ref_RearLeft_A MotorTemp_RearLeft_C InverterTemp_RearLeft_C InverterSpeed_RearRight_RPM Inverter_Iq_Ref_RearRight_A Inverter_Id_Ref_RearRight_A MotorTemp_RearRight_C InverterTemp_RearRight_C
0 2_High 0_Idle 1_Normal False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
1 2_High 0_Idle 1_Normal False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
2 2_High 0_Idle 1_Normal False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
3 2_High 0_Idle 1_Normal False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
4 2_High 0_Idle 1_Normal False 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low 0_Idle 1_Close_Zero 2_Close_Zero 0_Low 0_Low
In [21]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20250114_df, _ = DataEncoder.encode_categorical_columns(
    df=discretized_imola_20250114_df, 
    encoding_strategy='ordinal'
)

# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
    df=encoded_imola_20250114_df,
    fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
No description has been provided for this image
In [22]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'discr-20250114-imola.csv')

# Save the discretized dataset
DataProcessor.save_dataset(discretized_imola_20250114_df, output_file, file_format="csv")
print(f"✅ Saved discretized data to: {output_file}")
Dataset with shape (603724, 14), saved successfully at .\data\discretized-datasets-imola\discr-20250114-imola.csv (csv).
✅ Saved discretized data to: .\data\discretized-datasets-imola\discr-20250114-imola.csv

🍡 Unsupervised discretization with KMeans method¶

Define the number of bins for each feature

In [30]:
# Define the number of bins for each feature (K-Means discretization)
kmeans_bins = {
    'BatteryVoltage_V': 3,
    'BatteryCurrent_A': 3,
    'BatteryPackTemp_C': 4,
    'InverterSpeed_RearLeft_RPM': 6,
    'Inverter_Iq_Ref_RearLeft_A': 3,
    'Inverter_Id_Ref_RearLeft_A': 6,
    'MotorTemp_RearLeft_C': 4,
    'InverterTemp_RearLeft_C': 7,
    'InverterSpeed_RearRight_RPM': 6,
    'Inverter_Iq_Ref_RearRight_A': 3,
    'Inverter_Id_Ref_RearRight_A': 6,
    'MotorTemp_RearRight_C': 4,
    'InverterTemp_RearRight_C': 7,
}

🔷 Discretizing 20241128-imola dataset with KMeans¶

In [31]:
# Perform K-Means discretization
kmeans_discretized_imola_20241128_df, kmeans_result = DataDiscretizer.discretize_kmeans(
    df=imola_20241128_df.drop(columns=['InverterFault']), 
    n_bins=list(kmeans_bins.values())  # Extract bin values in correct order
)

# Display K-Means discretization results in a readable format
print("\n" + "="*80)
print("🔍 K-Means Discretization Summary")
print("="*80)

for col, details in kmeans_result.items():
    print(f"\n📌 Feature: {col}")
    print("-" * 60)
    print(f"  📊 Method: {details['method']}")
    print(f"  🔢 Number of Bins: {details['bin_count']}")
    print(f"  📍 Bin Edges: {details['bin_edges']}")
    print(f"  🎯 Cluster Centers: {details['cluster_centers']}")
    print(f"  📈 Value Counts: {details['value_counts']}")
    print(f"  🔄 Mapping: {details['mapping']}")
    print("-" * 60)
Processing column: BatteryVoltage_V
BatteryVoltage_V: Using provided n_bins: 3
BatteryVoltage_V discretized into 3 bins.
Processing column: BatteryCurrent_A
BatteryCurrent_A: Using provided n_bins: 3
BatteryCurrent_A discretized into 3 bins.
Processing column: BatteryPackTemp_C
BatteryPackTemp_C: Using provided n_bins: 4
BatteryPackTemp_C discretized into 4 bins.
Processing column: InverterSpeed_RearLeft_RPM
InverterSpeed_RearLeft_RPM: Using provided n_bins: 6
InverterSpeed_RearLeft_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearLeft_A
Inverter_Iq_Ref_RearLeft_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearLeft_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearLeft_A
Inverter_Id_Ref_RearLeft_A: Using provided n_bins: 6
Inverter_Id_Ref_RearLeft_A discretized into 6 bins.
Processing column: MotorTemp_RearLeft_C
MotorTemp_RearLeft_C: Using provided n_bins: 4
MotorTemp_RearLeft_C discretized into 4 bins.
Processing column: InverterTemp_RearLeft_C
InverterTemp_RearLeft_C: Using provided n_bins: 7
InverterTemp_RearLeft_C discretized into 7 bins.
Processing column: InverterSpeed_RearRight_RPM
InverterSpeed_RearRight_RPM: Using provided n_bins: 6
InverterSpeed_RearRight_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearRight_A
Inverter_Iq_Ref_RearRight_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearRight_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearRight_A
Inverter_Id_Ref_RearRight_A: Using provided n_bins: 6
Inverter_Id_Ref_RearRight_A discretized into 6 bins.
Processing column: MotorTemp_RearRight_C
MotorTemp_RearRight_C: Using provided n_bins: 4
MotorTemp_RearRight_C discretized into 4 bins.
Processing column: InverterTemp_RearRight_C
InverterTemp_RearRight_C: Using provided n_bins: 7
InverterTemp_RearRight_C discretized into 7 bins.

================================================================================
🔍 K-Means Discretization Summary
================================================================================

📌 Feature: BatteryVoltage_V
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [413.5, 457.6799926757813, 479.6400146484375, 510.7799987792969]
  🎯 Cluster Centers: [450.8841660643744, 493.16844684195354, 470.7280725710292]
  📈 Value Counts: {0: 94204, 1: 77240, 2: 77004}
  🔄 Mapping: {0: (413.5, 457.6799926757813), 1: (457.6799926757813, 479.6400146484375), 2: (479.6400146484375, 510.7799987792969)}
------------------------------------------------------------

📌 Feature: BatteryCurrent_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [-36.68999862670898, 0.0599999986588954, 0.6200000047686626, 115.23999786376952]
  🎯 Cluster Centers: [0.5426547254437413, 30.27464127164916, 77.05537592619686]
  📈 Value Counts: {0: 194991, 1: 42069, 2: 11388}
  🔄 Mapping: {0: (-36.68999862670898, 0.0599999986588954), 1: (0.0599999986588954, 0.6200000047686626), 2: (0.6200000047686626, 115.23999786376952)}
------------------------------------------------------------

📌 Feature: BatteryPackTemp_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 4
  📍 Bin Edges: [13.75, 16.75, 21.959999084472656, 27.81999969482422, 29.68000030517578]
  🎯 Cluster Centers: [28.616491927578554, 15.3777839255211, 24.45629642325104, 20.78825607443877]
  📈 Value Counts: {0: 82233, 1: 81874, 2: 36646, 3: 47695}
  🔄 Mapping: {0: (13.75, 16.75), 1: (16.75, 21.959999084472656), 2: (21.959999084472656, 27.81999969482422), 3: (27.81999969482422, 29.68000030517578)}
------------------------------------------------------------

📌 Feature: InverterSpeed_RearLeft_RPM
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-76.0, -1.0, 0.0, 3.0, 4769.0, 6867.0, 14881.0]
  🎯 Cluster Centers: [20.62962072867913, 6629.352946016759, 2974.976158847773, 10430.721695374417, 4995.9910128912, 8034.712264545499]
  📈 Value Counts: {0: 141459, 1: 36646, 2: 14210, 3: 5106, 4: 27153, 5: 23874}
  🔄 Mapping: {0: (-76.0, -1.0), 1: (-1.0, 0.0), 2: (0.0, 3.0), 3: (3.0, 4769.0), 4: (4769.0, 6867.0), 5: (6867.0, 14881.0)}
------------------------------------------------------------

📌 Feature: Inverter_Iq_Ref_RearLeft_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [-26.75, 0.0, 2.8125, 52.0]
  🎯 Cluster Centers: [-0.03620901408857424, 37.62631716450713, 17.729650426414203]
  📈 Value Counts: {0: 178110, 1: 22966, 2: 47372}
  🔄 Mapping: {0: (-26.75, 0.0), 1: (0.0, 2.8125), 2: (2.8125, 52.0)}
------------------------------------------------------------

📌 Feature: Inverter_Id_Ref_RearLeft_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-49.8125, -3.4375, -0.9375, 0.0, 0.0, 0.0, 0.0]
  🎯 Cluster Centers: [-0.10410681716289272, -8.536405968033035, -4.8914872472839095, -35.04382313231325, -20.93918398768281, -2.425968266831328]
  📈 Value Counts: {0: 171611, 1: 7758, 2: 27798, 3: 1111, 4: 1299, 5: 38871}
  🔄 Mapping: {0: (-49.8125, -3.4375), 1: (-3.4375, -0.9375), 2: (-0.9375, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------

📌 Feature: MotorTemp_RearLeft_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 4
  📍 Bin Edges: [12.0, 32.0, 39.0, 49.0, 76.0]
  🎯 Cluster Centers: [33.73333615158966, 63.31082826929664, 13.909595926208517, 45.822453446143655]
  📈 Value Counts: {0: 94621, 1: 41798, 2: 30242, 3: 81787}
  🔄 Mapping: {0: (12.0, 32.0), 1: (32.0, 39.0), 2: (39.0, 49.0), 3: (49.0, 76.0)}
------------------------------------------------------------

📌 Feature: InverterTemp_RearLeft_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 7
  📍 Bin Edges: [15.0, 24.0, 28.0, 29.0, 31.0, 33.0, 34.0, 45.0]
  🎯 Cluster Centers: [28.29360564341743, 33.80628629996717, 16.793900570946256, 24.46665411207726, 41.71512081060014, 30.985814302383712, 37.24841229286772]
  📈 Value Counts: {0: 52734, 1: 55613, 2: 21543, 3: 37171, 4: 7698, 5: 59990, 6: 13699}
  🔄 Mapping: {0: (15.0, 24.0), 1: (24.0, 28.0), 2: (28.0, 29.0), 3: (29.0, 31.0), 4: (31.0, 33.0), 5: (33.0, 34.0), 6: (34.0, 45.0)}
------------------------------------------------------------

📌 Feature: InverterSpeed_RearRight_RPM
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-11.0, 0.0, 0.0, 0.0, 4794.0, 6693.0, 12655.0]
  🎯 Cluster Centers: [18.35260185393554, 7562.477915137873, 6057.463134942713, 2586.238049940551, 9584.833135938898, 4453.473028507175]
  📈 Value Counts: {0: 141088, 1: 31002, 2: 37179, 3: 8321, 4: 7599, 5: 23259}
  🔄 Mapping: {0: (-11.0, 0.0), 1: (0.0, 0.0), 2: (0.0, 0.0), 3: (0.0, 4794.0), 4: (4794.0, 6693.0), 5: (6693.0, 12655.0)}
------------------------------------------------------------

📌 Feature: Inverter_Iq_Ref_RearRight_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [-50.5, -1.5625, 0.0, 32.875]
  🎯 Cluster Centers: [0.3667427478488543, -16.387898260201688, -34.52940134037582]
  📈 Value Counts: {0: 178154, 1: 47017, 2: 23277}
  🔄 Mapping: {0: (-50.5, -1.5625), 1: (-1.5625, 0.0), 2: (0.0, 32.875)}
------------------------------------------------------------

📌 Feature: Inverter_Id_Ref_RearRight_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-45.375, -3.1875, -1.0625, 0.0, 0.0, 0.0, 0.0]
  🎯 Cluster Centers: [-2.194971661268907, -0.08769584661903052, -31.31338304552601, -4.467119527206286, -18.326174242424344, -7.674653057119889]
  📈 Value Counts: {0: 41992, 1: 167478, 2: 1274, 3: 28596, 4: 1650, 5: 7458}
  🔄 Mapping: {0: (-45.375, -3.1875), 1: (-3.1875, -1.0625), 2: (-1.0625, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------

📌 Feature: MotorTemp_RearRight_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 4
  📍 Bin Edges: [13.0, 34.0, 42.0, 54.0, 80.0]
  🎯 Cluster Centers: [50.81639795517266, 36.31441086634455, 68.33492622608333, 14.891567480341838]
  📈 Value Counts: {0: 76290, 1: 101819, 2: 40597, 3: 29742}
  🔄 Mapping: {0: (13.0, 34.0), 1: (34.0, 42.0), 2: (42.0, 54.0), 3: (54.0, 80.0)}
------------------------------------------------------------

📌 Feature: InverterTemp_RearRight_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 7
  📍 Bin Edges: [15.0, 24.0, 28.0, 29.0, 31.0, 33.0, 34.0, 44.0]
  🎯 Cluster Centers: [40.15338860850781, 30.95595754944509, 24.60902791639487, 15.345998848588128, 34.208817829458326, 28.27070743333499, 19.113807704225827]
  📈 Value Counts: {0: 11096, 1: 62190, 2: 35069, 3: 12159, 4: 68112, 5: 50139, 6: 9683}
  🔄 Mapping: {0: (15.0, 24.0), 1: (24.0, 28.0), 2: (28.0, 29.0), 3: (29.0, 31.0), 4: (31.0, 33.0), 5: (33.0, 34.0), 6: (34.0, 44.0)}
------------------------------------------------------------
In [33]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, binarized_fault_df = DataDiscretizer.binarize_columns(
    df=imola_20241128_df['InverterFault'].to_frame(),
    columns=['InverterFault'],
    thresholds={'InverterFault': 0.5},
    labels={'InverterFault': [0, 1]}
)

# Add the binzarized column to discretized dataframe
kmeans_discretized_imola_20241128_df['InverterFault'] = binarized_fault_df['InverterFault']

# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
    print(f"\n🔹 Column: {column}")
    print(f"   - Threshold: {info['threshold']}")
    print(f"   - True (%): {info['percentage_true']}")
    print("   - Value Counts:")
    print(info['value_counts'].to_string())
📊 Binarization Overview:

🔹 Column: InverterFault
   - Threshold: 0.5
   - True (%): 0.0
   - Value Counts:
InverterFault
0    248448
In [34]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20241128_df, _ = DataEncoder.encode_categorical_columns(
    df=kmeans_discretized_imola_20241128_df, 
    encoding_strategy='ordinal'
)

# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
    df=encoded_imola_20241128_df,
    fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
No description has been provided for this image
In [35]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'kmeans', 'discr-20241128-imola.csv')

# Save the discretized dataset
DataProcessor.save_dataset(kmeans_discretized_imola_20241128_df, output_file, file_format="csv")
print(f"✅ Saved discretized (KMeans) data to: {output_file}")
Dataset with shape (248448, 14), saved successfully at .\data\discretized-datasets-imola\kmeans\discr-20241128-imola.csv (csv).
✅ Saved discretized (KMeans) data to: .\data\discretized-datasets-imola\kmeans\discr-20241128-imola.csv

🔷 Discretizing 20250113-imola dataset with KMeans¶

In [36]:
# Perform K-Means discretization
kmeans_discretized_imola_20250113_df, kmeans_result = DataDiscretizer.discretize_kmeans(
    df=imola_20250113_df.drop(columns=['InverterFault']), 
    n_bins=list(kmeans_bins.values())  # Extract bin values in correct order
)

# Display K-Means discretization results in a readable format
print("\n" + "="*80)
print("🔍 K-Means Discretization Summary")
print("="*80)

for col, details in kmeans_result.items():
    print(f"\n📌 Feature: {col}")
    print("-" * 60)
    print(f"  📊 Method: {details['method']}")
    print(f"  🔢 Number of Bins: {details['bin_count']}")
    print(f"  📍 Bin Edges: {details['bin_edges']}")
    print(f"  🎯 Cluster Centers: {details['cluster_centers']}")
    print(f"  📈 Value Counts: {details['value_counts']}")
    print(f"  🔄 Mapping: {details['mapping']}")
    print("-" * 60)
Processing column: BatteryVoltage_V
BatteryVoltage_V: Using provided n_bins: 3
BatteryVoltage_V discretized into 3 bins.
Processing column: BatteryCurrent_A
BatteryCurrent_A: Using provided n_bins: 3
BatteryCurrent_A discretized into 3 bins.
Processing column: BatteryPackTemp_C
BatteryPackTemp_C: Using provided n_bins: 4
BatteryPackTemp_C discretized into 4 bins.
Processing column: InverterSpeed_RearLeft_RPM
InverterSpeed_RearLeft_RPM: Using provided n_bins: 6
InverterSpeed_RearLeft_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearLeft_A
Inverter_Iq_Ref_RearLeft_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearLeft_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearLeft_A
Inverter_Id_Ref_RearLeft_A: Using provided n_bins: 6
Inverter_Id_Ref_RearLeft_A discretized into 6 bins.
Processing column: MotorTemp_RearLeft_C
MotorTemp_RearLeft_C: Using provided n_bins: 4
MotorTemp_RearLeft_C discretized into 4 bins.
Processing column: InverterTemp_RearLeft_C
InverterTemp_RearLeft_C: Using provided n_bins: 7
InverterTemp_RearLeft_C discretized into 7 bins.
Processing column: InverterSpeed_RearRight_RPM
InverterSpeed_RearRight_RPM: Using provided n_bins: 6
InverterSpeed_RearRight_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearRight_A
Inverter_Iq_Ref_RearRight_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearRight_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearRight_A
Inverter_Id_Ref_RearRight_A: Using provided n_bins: 6
Inverter_Id_Ref_RearRight_A discretized into 6 bins.
Processing column: MotorTemp_RearRight_C
MotorTemp_RearRight_C: Using provided n_bins: 4
MotorTemp_RearRight_C discretized into 4 bins.
Processing column: InverterTemp_RearRight_C
InverterTemp_RearRight_C: Using provided n_bins: 7
InverterTemp_RearRight_C discretized into 7 bins.

================================================================================
🔍 K-Means Discretization Summary
================================================================================

📌 Feature: BatteryVoltage_V
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [385.8200073242188, 453.8200073242188, 473.8800048828125, 503.1000061035156]
  🎯 Cluster Centers: [463.57189172733916, 486.486566417157, 438.74000215180104]
  📈 Value Counts: {0: 191299, 1: 158915, 2: 147339}
  🔄 Mapping: {0: (385.8200073242188, 453.8200073242188), 1: (453.8200073242188, 473.8800048828125), 2: (473.8800048828125, 503.1000061035156)}
------------------------------------------------------------

📌 Feature: BatteryCurrent_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [-13.5, 0.0299999993294477, 1.090000033378601, 115.79000091552734]
  🎯 Cluster Centers: [1.1704989269415709, 25.64220777929068, 68.675531492679]
  📈 Value Counts: {0: 426140, 1: 55553, 2: 15860}
  🔄 Mapping: {0: (-13.5, 0.0299999993294477), 1: (0.0299999993294477, 1.090000033378601), 2: (1.090000033378601, 115.79000091552734)}
------------------------------------------------------------

📌 Feature: BatteryPackTemp_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 4
  📍 Bin Edges: [5.190000057220459, 11.489999771118164, 15.729999542236328, 19.270000457763672, 23.63999938964844]
  🎯 Cluster Centers: [16.574878316087663, 7.251553135798417, 12.646073472343842, 20.727677360132574]
  📈 Value Counts: {0: 155692, 1: 90482, 2: 118091, 3: 133288}
  🔄 Mapping: {0: (5.190000057220459, 11.489999771118164), 1: (11.489999771118164, 15.729999542236328), 2: (15.729999542236328, 19.270000457763672), 3: (19.270000457763672, 23.63999938964844)}
------------------------------------------------------------

📌 Feature: InverterSpeed_RearLeft_RPM
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-1439.0, -1.0, 1.0, 1748.0, 3440.0, 4773.0, 14967.0]
  🎯 Cluster Centers: [6145.221053681564, 23.614864468717315, 3502.5129763055565, 2254.090649862672, 8847.434798270768, 4735.365005369279]
  📈 Value Counts: {0: 39808, 1: 239874, 2: 78946, 3: 56930, 4: 8245, 5: 73750}
  🔄 Mapping: {0: (-1439.0, -1.0), 1: (-1.0, 1.0), 2: (1.0, 1748.0), 3: (1748.0, 3440.0), 4: (3440.0, 4773.0), 5: (4773.0, 14967.0)}
------------------------------------------------------------

📌 Feature: Inverter_Iq_Ref_RearLeft_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [-7.0625, 0.0, 2.4375, 52.0]
  🎯 Cluster Centers: [0.660740561115186, 16.28424835630546, 37.76631710856252]
  📈 Value Counts: {0: 378885, 1: 80763, 2: 37905}
  🔄 Mapping: {0: (-7.0625, 0.0), 1: (0.0, 2.4375), 2: (2.4375, 52.0)}
------------------------------------------------------------

📌 Feature: Inverter_Id_Ref_RearLeft_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-52.9375, -2.5625, -0.5, 0.0, 0.0, 0.0, 0.0]
  🎯 Cluster Centers: [-0.08436387709837745, -4.577186134785373, -19.766811389337715, -2.091052557345818, -7.734198063287585, -33.64936440677975]
  📈 Value Counts: {0: 362147, 1: 41889, 2: 1238, 3: 72673, 4: 18898, 5: 708}
  🔄 Mapping: {0: (-52.9375, -2.5625), 1: (-2.5625, -0.5), 2: (-0.5, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------

📌 Feature: MotorTemp_RearLeft_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 4
  📍 Bin Edges: [7.0, 29.0, 33.0, 42.0, 57.0]
  🎯 Cluster Centers: [37.20963659214347, 10.808418868734584, 46.09639515312672, 29.257080727488095]
  📈 Value Counts: {0: 112986, 1: 36632, 2: 125359, 3: 222576}
  🔄 Mapping: {0: (7.0, 29.0), 1: (29.0, 33.0), 2: (33.0, 42.0), 3: (42.0, 57.0)}
------------------------------------------------------------

📌 Feature: InverterTemp_RearLeft_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 7
  📍 Bin Edges: [0.0, 22.0, 24.0, 25.0, 25.0, 28.0, 32.0, 45.0]
  🎯 Cluster Centers: [24.754142788583945, 37.25794094174108, 11.042696494079383, 31.75548726953521, 22.37958086758254, 27.918712756767647, 19.087277811067025]
  📈 Value Counts: {0: 212538, 1: 37590, 2: 16629, 3: 63784, 4: 73676, 5: 68301, 6: 25035}
  🔄 Mapping: {0: (0.0, 22.0), 1: (22.0, 24.0), 2: (24.0, 25.0), 3: (25.0, 25.0), 4: (25.0, 28.0), 5: (28.0, 32.0), 6: (32.0, 45.0)}
------------------------------------------------------------

📌 Feature: InverterSpeed_RearRight_RPM
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-573.0, 0.0, 0.0, 1740.0, 3412.0, 4670.0, 10113.0]
  🎯 Cluster Centers: [3404.440648373218, 24.222514019922983, 5969.147318248084, 2178.6841410450065, 4613.262369869755, 8172.621650705116]
  📈 Value Counts: {0: 80440, 1: 239192, 2: 40866, 3: 52637, 4: 75064, 5: 9354}
  🔄 Mapping: {0: (-573.0, 0.0), 1: (0.0, 0.0), 2: (0.0, 1740.0), 3: (1740.0, 3412.0), 4: (3412.0, 4670.0), 5: (4670.0, 10113.0)}
------------------------------------------------------------

📌 Feature: Inverter_Iq_Ref_RearRight_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [-50.5, -2.5, 0.0, 6.875]
  🎯 Cluster Centers: [-0.6219269492611454, -36.66034967066401, -15.67274023674117]
  📈 Value Counts: {0: 377231, 1: 38715, 2: 81607}
  🔄 Mapping: {0: (-50.5, -2.5), 1: (-2.5, 0.0), 2: (0.0, 6.875)}
------------------------------------------------------------

📌 Feature: Inverter_Id_Ref_RearRight_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-45.9375, -2.5, -0.4375, 0.0, 0.0, 0.0, 0.0]
  🎯 Cluster Centers: [-0.07881202377394536, -7.499628380124779, -20.086969111969122, -2.0071903333557284, -34.17778649921508, -4.423834872734697]
  📈 Value Counts: {0: 360386, 1: 19341, 2: 1295, 3: 73345, 4: 637, 5: 42549}
  🔄 Mapping: {0: (-45.9375, -2.5), 1: (-2.5, -0.4375), 2: (-0.4375, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------

📌 Feature: MotorTemp_RearRight_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 4
  📍 Bin Edges: [9.0, 30.0, 36.0, 47.0, 67.0]
  🎯 Cluster Centers: [40.664866271368155, 12.336165048540362, 52.94671602711386, 30.455701838373397]
  📈 Value Counts: {0: 126824, 1: 35432, 2: 126417, 3: 208880}
  🔄 Mapping: {0: (9.0, 30.0), 1: (30.0, 36.0), 2: (36.0, 47.0), 3: (47.0, 67.0)}
------------------------------------------------------------

📌 Feature: InverterTemp_RearRight_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 7
  📍 Bin Edges: [0.0, 23.0, 24.0, 25.0, 27.0, 28.0, 32.0, 46.0]
  🎯 Cluster Centers: [33.33836957363041, 19.69597027058333, 25.8265265627967, 11.857350432747063, 23.40151820011179, 37.765118809449774, 29.168481139930567]
  📈 Value Counts: {0: 51692, 1: 25833, 2: 173871, 3: 15829, 4: 109340, 5: 28491, 6: 92497}
  🔄 Mapping: {0: (0.0, 23.0), 1: (23.0, 24.0), 2: (24.0, 25.0), 3: (25.0, 27.0), 4: (27.0, 28.0), 5: (28.0, 32.0), 6: (32.0, 46.0)}
------------------------------------------------------------
In [37]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, binarized_fault_df = DataDiscretizer.binarize_columns(
    df=imola_20250113_df['InverterFault'].to_frame(),
    columns=['InverterFault'],
    thresholds={'InverterFault': 0.5},
    labels={'InverterFault': [0, 1]}
)

# Add the binzarized column to discretized dataframe
kmeans_discretized_imola_20250113_df['InverterFault'] = binarized_fault_df['InverterFault']

# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
    print(f"\n🔹 Column: {column}")
    print(f"   - Threshold: {info['threshold']}")
    print(f"   - True (%): {info['percentage_true']}")
    print("   - Value Counts:")
    print(info['value_counts'].to_string())
📊 Binarization Overview:

🔹 Column: InverterFault
   - Threshold: 0.5
   - True (%): 11.620872550260977
   - Value Counts:
InverterFault
0    439733
1     57820
In [38]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20250113_df, _ = DataEncoder.encode_categorical_columns(
    df=kmeans_discretized_imola_20250113_df, 
    encoding_strategy='ordinal'
)

# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
    df=encoded_imola_20250113_df,
    fault_col='InverterFault'
)
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
No description has been provided for this image
In [39]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'kmeans', 'discr-20250113-imola.csv')

# Save the discretized dataset
DataProcessor.save_dataset(kmeans_discretized_imola_20250113_df, output_file, file_format="csv")
print(f"✅ Saved discretized (KMeans) data to: {output_file}")
Dataset with shape (497553, 14), saved successfully at .\data\discretized-datasets-imola\kmeans\discr-20250113-imola.csv (csv).
✅ Saved discretized (KMeans) data to: .\data\discretized-datasets-imola\kmeans\discr-20250113-imola.csv

🔷 Discretizing 20250114-imola dataset with KMeans¶

In [40]:
# Perform K-Means discretization
kmeans_discretized_imola_20250114_df, kmeans_result = DataDiscretizer.discretize_kmeans(
    df=imola_20250114_df.drop(columns=['InverterFault']), 
    n_bins=list(kmeans_bins.values())  # Extract bin values in correct order
)


# Display K-Means discretization results in a readable format
print("\n" + "="*80)
print("🔍 K-Means Discretization Summary")
print("="*80)

for col, details in kmeans_result.items():
    print(f"\n📌 Feature: {col}")
    print("-" * 60)
    print(f"  📊 Method: {details['method']}")
    print(f"  🔢 Number of Bins: {details['bin_count']}")
    print(f"  📍 Bin Edges: {details['bin_edges']}")
    print(f"  🎯 Cluster Centers: {details['cluster_centers']}")
    print(f"  📈 Value Counts: {details['value_counts']}")
    print(f"  🔄 Mapping: {details['mapping']}")
    print("-" * 60)
Processing column: BatteryVoltage_V
BatteryVoltage_V: Using provided n_bins: 3
BatteryVoltage_V discretized into 3 bins.
Processing column: BatteryCurrent_A
BatteryCurrent_A: Using provided n_bins: 3
BatteryCurrent_A discretized into 3 bins.
Processing column: BatteryPackTemp_C
BatteryPackTemp_C: Using provided n_bins: 4
BatteryPackTemp_C discretized into 4 bins.
Processing column: InverterSpeed_RearLeft_RPM
InverterSpeed_RearLeft_RPM: Using provided n_bins: 6
InverterSpeed_RearLeft_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearLeft_A
Inverter_Iq_Ref_RearLeft_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearLeft_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearLeft_A
Inverter_Id_Ref_RearLeft_A: Using provided n_bins: 6
Inverter_Id_Ref_RearLeft_A discretized into 6 bins.
Processing column: MotorTemp_RearLeft_C
MotorTemp_RearLeft_C: Using provided n_bins: 4
MotorTemp_RearLeft_C discretized into 4 bins.
Processing column: InverterTemp_RearLeft_C
InverterTemp_RearLeft_C: Using provided n_bins: 7
InverterTemp_RearLeft_C discretized into 7 bins.
Processing column: InverterSpeed_RearRight_RPM
InverterSpeed_RearRight_RPM: Using provided n_bins: 6
InverterSpeed_RearRight_RPM discretized into 6 bins.
Processing column: Inverter_Iq_Ref_RearRight_A
Inverter_Iq_Ref_RearRight_A: Using provided n_bins: 3
Inverter_Iq_Ref_RearRight_A discretized into 3 bins.
Processing column: Inverter_Id_Ref_RearRight_A
Inverter_Id_Ref_RearRight_A: Using provided n_bins: 6
Inverter_Id_Ref_RearRight_A discretized into 6 bins.
Processing column: MotorTemp_RearRight_C
MotorTemp_RearRight_C: Using provided n_bins: 4
MotorTemp_RearRight_C discretized into 4 bins.
Processing column: InverterTemp_RearRight_C
InverterTemp_RearRight_C: Using provided n_bins: 7
InverterTemp_RearRight_C discretized into 7 bins.

================================================================================
🔍 K-Means Discretization Summary
================================================================================

📌 Feature: BatteryVoltage_V
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [405.3999938964844, 465.5, 485.260009765625, 503.260009765625]
  🎯 Cluster Centers: [463.01402500742046, 487.8670697842577, 443.71776983770343]
  📈 Value Counts: {0: 205420, 1: 342734, 2: 55570}
  🔄 Mapping: {0: (405.3999938964844, 465.5), 1: (465.5, 485.260009765625), 2: (485.260009765625, 503.260009765625)}
------------------------------------------------------------

📌 Feature: BatteryCurrent_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [-14.93000030517578, 0.0299999993294477, 0.0900000035762786, 116.33999633789062]
  🎯 Cluster Centers: [0.6347228325262977, 70.11582684037796, 26.173025176113846]
  📈 Value Counts: {0: 522464, 1: 17234, 2: 64026}
  🔄 Mapping: {0: (-14.93000030517578, 0.0299999993294477), 1: (0.0299999993294477, 0.0900000035762786), 2: (0.0900000035762786, 116.33999633789062)}
------------------------------------------------------------

📌 Feature: BatteryPackTemp_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 4
  📍 Bin Edges: [15.100000381469728, 18.950000762939453, 28.540000915527344, 30.729999542236328, 33.150001525878906]
  🎯 Cluster Centers: [16.650820800408827, 31.10757045999683, 28.16873758727166, 18.710720547811018]
  📈 Value Counts: {0: 109826, 1: 212408, 2: 221648, 3: 59842}
  🔄 Mapping: {0: (15.100000381469728, 18.950000762939453), 1: (18.950000762939453, 28.540000915527344), 2: (28.540000915527344, 30.729999542236328), 3: (30.729999542236328, 33.150001525878906)}
------------------------------------------------------------

📌 Feature: InverterSpeed_RearLeft_RPM
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-326.0, -1.0, 0.0, 3.0, 2851.0, 4808.0, 15312.0]
  🎯 Cluster Centers: [29.16145619705503, 4985.88094842793, 2209.337123656471, 9058.826691290482, 3660.076954563566, 6397.409932015105]
  📈 Value Counts: {0: 360934, 1: 72128, 2: 44864, 3: 10359, 4: 70790, 5: 44649}
  🔄 Mapping: {0: (-326.0, -1.0), 1: (-1.0, 0.0), 2: (0.0, 3.0), 3: (3.0, 2851.0), 4: (2851.0, 4808.0), 5: (4808.0, 15312.0)}
------------------------------------------------------------

📌 Feature: Inverter_Iq_Ref_RearLeft_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [-7.625, 0.0, 0.0, 51.875]
  🎯 Cluster Centers: [0.4015144414907743, 39.82179805002807, 19.093881074086724]
  📈 Value Counts: {0: 493474, 1: 36103, 2: 74147}
  🔄 Mapping: {0: (-7.625, 0.0), 1: (0.0, 0.0), 2: (0.0, 51.875)}
------------------------------------------------------------

📌 Feature: Inverter_Id_Ref_RearLeft_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-52.9375, -2.0625, 0.0, 0.0, 0.0, 0.0, 0.0]
  🎯 Cluster Centers: [-0.05359657265552431, -7.8063245315163, -19.932142857142928, -2.2288847354971297, -4.615025932316691, -32.822265624999964]
  📈 Value Counts: {0: 479380, 1: 21719, 2: 980, 3: 52835, 4: 48106, 5: 704}
  🔄 Mapping: {0: (-52.9375, -2.0625), 1: (-2.0625, 0.0), 2: (0.0, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------

📌 Feature: MotorTemp_RearLeft_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 4
  📍 Bin Edges: [4.0, 27.0, 31.0, 38.0, 66.0]
  🎯 Cluster Centers: [12.245433175298668, 29.69924260576422, 41.070378301215456, 57.90890176371201]
  📈 Value Counts: {0: 76147, 1: 342226, 2: 154110, 3: 31241}
  🔄 Mapping: {0: (4.0, 27.0), 1: (27.0, 31.0), 2: (31.0, 38.0), 3: (38.0, 66.0)}
------------------------------------------------------------

📌 Feature: InverterTemp_RearLeft_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 7
  📍 Bin Edges: [0.0, 22.0, 24.0, 25.0, 27.0, 29.0, 33.0, 45.0]
  🎯 Cluster Centers: [17.068451324289217, 27.91318319685784, 22.124766433674626, 35.749308681884244, 10.768779002317565, 24.868292006721873, 31.239327123657674]
  📈 Value Counts: {0: 28166, 1: 116383, 2: 101149, 3: 74857, 4: 27584, 5: 173133, 6: 82452}
  🔄 Mapping: {0: (0.0, 22.0), 1: (22.0, 24.0), 2: (24.0, 25.0), 3: (25.0, 27.0), 4: (27.0, 29.0), 5: (29.0, 33.0), 6: (33.0, 45.0)}
------------------------------------------------------------

📌 Feature: InverterSpeed_RearRight_RPM
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-239.0, 0.0, 0.0, 0.0, 2868.0, 4727.0, 11698.0]
  🎯 Cluster Centers: [30.720205704699083, 4819.449302477875, 3531.1125359310913, 8502.343852388374, 6177.489420483198, 2079.717369619197]
  📈 Value Counts: {0: 360872, 1: 73998, 2: 71836, 3: 11483, 4: 46465, 5: 39070}
  🔄 Mapping: {0: (-239.0, 0.0), 1: (0.0, 0.0), 2: (0.0, 0.0), 3: (0.0, 2868.0), 4: (2868.0, 4727.0), 5: (4727.0, 11698.0)}
------------------------------------------------------------

📌 Feature: Inverter_Iq_Ref_RearRight_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 3
  📍 Bin Edges: [-50.375, 0.0, 0.0, 7.375]
  🎯 Cluster Centers: [-0.4094093342567513, -39.35148427334678, -18.911576733068237]
  📈 Value Counts: {0: 493676, 1: 35354, 2: 74694}
  🔄 Mapping: {0: (-50.375, 0.0), 1: (0.0, 0.0), 2: (0.0, 7.375)}
------------------------------------------------------------

📌 Feature: Inverter_Id_Ref_RearRight_A
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 6
  📍 Bin Edges: [-46.125, -2.0625, 0.0, 0.0, 0.0, 0.0, 0.0]
  🎯 Cluster Centers: [-4.588396467617704, -0.054102910237402435, -7.724731182795729, -32.82863729508191, -2.2362862332032822, -18.052535377358538]
  📈 Value Counts: {0: 46739, 1: 479102, 2: 20925, 3: 976, 4: 54922, 5: 1060}
  🔄 Mapping: {0: (-46.125, -2.0625), 1: (-2.0625, 0.0), 2: (0.0, 0.0), 3: (0.0, 0.0), 4: (0.0, 0.0), 5: (0.0, 0.0)}
------------------------------------------------------------

📌 Feature: MotorTemp_RearRight_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 4
  📍 Bin Edges: [4.0, 29.0, 34.0, 42.0, 77.0]
  🎯 Cluster Centers: [31.552076309952025, 45.704185796906636, 13.3894029183318, 67.0992184107673]
  📈 Value Counts: {0: 332224, 1: 167710, 2: 71548, 3: 32242}
  🔄 Mapping: {0: (4.0, 29.0), 1: (29.0, 34.0), 2: (34.0, 42.0), 3: (42.0, 77.0)}
------------------------------------------------------------

📌 Feature: InverterTemp_RearRight_C
------------------------------------------------------------
  📊 Method: K-Means Binning
  🔢 Number of Bins: 7
  📍 Bin Edges: [0.0, 22.0, 24.0, 25.0, 28.0, 30.0, 34.0, 46.0]
  🎯 Cluster Centers: [21.877696067917938, 36.47059690994887, 28.14853160375501, 13.040176704302757, 31.41631274235912, 24.92583922101859, 8.526512829121202e-14]
  📈 Value Counts: {0: 93284, 1: 88154, 2: 124115, 3: 38256, 4: 92063, 5: 166166, 6: 1686}
  🔄 Mapping: {0: (0.0, 22.0), 1: (22.0, 24.0), 2: (24.0, 25.0), 3: (25.0, 28.0), 4: (28.0, 30.0), 5: (30.0, 34.0), 6: (34.0, 46.0)}
------------------------------------------------------------
In [43]:
# Perform discretization on numeric columns using predefined bin configurations
binarization_info, binarized_fault_df = DataDiscretizer.binarize_columns(
    df=imola_20250114_df['InverterFault'].to_frame(),
    columns=['InverterFault'],
    thresholds={'InverterFault': 0.5},
    labels={'InverterFault': [0, 1]}
)

# Add the binzarized column to discretized dataframe
kmeans_discretized_imola_20250114_df['InverterFault'] = binarized_fault_df['InverterFault']

# Display summary of binarization results
print("\n📊 Binarization Overview:")
for column, info in binarization_info.items():
    print(f"\n🔹 Column: {column}")
    print(f"   - Threshold: {info['threshold']}")
    print(f"   - True (%): {info['percentage_true']}")
    print("   - Value Counts:")
    print(info['value_counts'].to_string())
📊 Binarization Overview:

🔹 Column: InverterFault
   - Threshold: 0.5
   - True (%): 12.56931975538491
   - Value Counts:
InverterFault
0    527840
1     75884
In [46]:
# Encode categorical columns using the specified encoding strategy
encoded_imola_20250114_df, _ = DataEncoder.encode_categorical_columns(
    df=kmeans_discretized_imola_20250114_df, 
    encoding_strategy='ordinal'
)

# Visualize the distribution of discrete variables with respect to 'InverterFault'
DataVisualizer.plot_discrete_distributions(
    df=encoded_imola_20250114_df,
    fault_col='InverterFault'
)
No description has been provided for this image
In [47]:
# Define output file path
output_file = os.path.join(DISCRETIZED_ORIGINAL_DATASETS_IMOLA_FOLDER_NAME, 'kmeans', 'discr-20250114-imola.csv')

# Save the discretized dataset
DataProcessor.save_dataset(kmeans_discretized_imola_20250114_df, output_file, file_format="csv")
print(f"✅ Saved discretized (KMeans) data to: {output_file}")
Dataset with shape (603724, 14), saved successfully at .\data\discretized-datasets-imola\kmeans\discr-20250114-imola.csv (csv).
✅ Saved discretized (KMeans) data to: .\data\discretized-datasets-imola\kmeans\discr-20250114-imola.csv